1
1
import os
2
2
import pickle
3
- import platform
4
3
import shutil
5
4
import socket
6
5
import sqlite3
@@ -281,7 +280,7 @@ def test_run_and_parse_picklepatch() -> None:
281
280
cursor .execute (
282
281
"SELECT function_name, class_name, module_name, file_path, benchmark_function_name, benchmark_module_path, benchmark_line_number FROM benchmark_function_timings ORDER BY benchmark_module_path, benchmark_function_name, function_name" )
283
282
function_calls = cursor .fetchall ()
284
-
283
+
285
284
# Assert the length of function calls
286
285
assert len (function_calls ) == 2 , f"Expected 2 function calls, but got { len (function_calls )} "
287
286
function_benchmark_timings = codeflash_benchmark_plugin .get_function_benchmark_timings (output_file )
@@ -327,6 +326,7 @@ def test_run_and_parse_picklepatch() -> None:
327
326
assert actual [4 ] == expected [4 ], f"Mismatch at index { idx } for benchmark_function_name"
328
327
assert actual [5 ] == expected [5 ], f"Mismatch at index { idx } for benchmark_module_path"
329
328
assert actual [6 ] == expected [6 ], f"Mismatch at index { idx } for benchmark_line_number"
329
+ conn .close ()
330
330
331
331
# Generate replay test
332
332
generate_replay_test (output_file , replay_tests_dir )
@@ -372,7 +372,7 @@ def test_run_and_parse_picklepatch() -> None:
372
372
test_env ["CODEFLASH_TEST_ITERATION" ] = "0"
373
373
test_env ["CODEFLASH_LOOP_INDEX" ] = "1"
374
374
test_type = TestType .REPLAY_TEST
375
- replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket "
375
+ replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket_test_socket_picklepatch "
376
376
func_optimizer = opt .create_function_optimizer (func )
377
377
func_optimizer .test_files = TestFiles (
378
378
test_files = [
@@ -396,7 +396,7 @@ def test_run_and_parse_picklepatch() -> None:
396
396
)
397
397
assert len (test_results_unused_socket ) == 1
398
398
assert test_results_unused_socket .test_results [0 ].id .test_module_path == "code_to_optimize.tests.pytest.benchmarks_socket_test.codeflash_replay_tests.test_code_to_optimize_tests_pytest_benchmarks_socket_test_test_socket__replay_test_0"
399
- assert test_results_unused_socket .test_results [0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket "
399
+ assert test_results_unused_socket .test_results [0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket_test_socket_picklepatch "
400
400
assert test_results_unused_socket .test_results [0 ].did_pass == True
401
401
402
402
# Replace with optimized candidate
@@ -440,16 +440,14 @@ def bubble_sort_with_unused_socket(data_container):
440
440
assert new_test is not None
441
441
replay_test_path .write_text (new_test )
442
442
443
- # Run test for original function code that uses the socket. This test should pass because
444
- # the PicklePlaceholderAccessError is thrown as expected behavior, which the test framework
445
- # treats as a successful test execution (the exception is the expected outcome).
443
+ # Run test for original function code that uses the socket. This should fail, as the PicklePlaceholder is accessed.
446
444
test_env = os .environ .copy ()
447
445
test_env ["CODEFLASH_TEST_ITERATION" ] = "0"
448
446
test_env ["CODEFLASH_LOOP_INDEX" ] = "1"
449
447
test_type = TestType .REPLAY_TEST
450
448
func = FunctionToOptimize (function_name = "bubble_sort_with_used_socket" , parents = [],
451
449
file_path = Path (fto_used_socket_path ))
452
- replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket "
450
+ replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket_test_used_socket_picklepatch "
453
451
func_optimizer = opt .create_function_optimizer (func )
454
452
func_optimizer .test_files = TestFiles (
455
453
test_files = [
@@ -477,8 +475,8 @@ def bubble_sort_with_unused_socket(data_container):
477
475
assert test_results_used_socket .test_results [
478
476
0 ].id .test_module_path == "code_to_optimize.tests.pytest.benchmarks_socket_test.codeflash_replay_tests.test_code_to_optimize_tests_pytest_benchmarks_socket_test_test_socket__replay_test_0"
479
477
assert test_results_used_socket .test_results [
480
- 0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket "
481
- assert test_results_used_socket .test_results [0 ].did_pass is True
478
+ 0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket_test_used_socket_picklepatch "
479
+ assert test_results_used_socket .test_results [0 ].did_pass is False
482
480
print ("test results used socket" )
483
481
print (test_results_used_socket )
484
482
# Replace with optimized candidate
@@ -508,27 +506,15 @@ def bubble_sort_with_used_socket(data_container):
508
506
assert test_results_used_socket .test_results [
509
507
0 ].id .test_module_path == "code_to_optimize.tests.pytest.benchmarks_socket_test.codeflash_replay_tests.test_code_to_optimize_tests_pytest_benchmarks_socket_test_test_socket__replay_test_0"
510
508
assert test_results_used_socket .test_results [
511
- 0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket"
512
-
513
- # TODO: Fix socket behavior differences on Windows
514
- # On Windows, socket behavior differs from Unix platforms, causing tests to pass instead of fail
515
- if platform .system () == "Windows" :
516
- # On Windows, the test passes when it should fail due to socket behavior differences
517
- assert test_results_used_socket .test_results [0 ].did_pass is True
518
- assert optimized_test_results_used_socket .test_results [0 ].did_pass is True
519
- # Since both pass on Windows, comparison should return True
520
- assert compare_test_results (test_results_used_socket , optimized_test_results_used_socket ) is True
521
- else :
522
- # On Unix platforms, the expected behavior is that the test fails
523
- assert test_results_used_socket .test_results [0 ].did_pass is False
524
- assert optimized_test_results_used_socket .test_results [0 ].did_pass is True
525
- # Even though tests threw the same error, we reject this as the behavior of the unpickleable object could not be determined.
526
- assert compare_test_results (test_results_used_socket , optimized_test_results_used_socket ) is False
509
+ 0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket_test_used_socket_picklepatch"
510
+ assert test_results_used_socket .test_results [0 ].did_pass is False
511
+
512
+ # Even though tests threw the same error, we reject this as the behavior of the unpickleable object could not be determined.
513
+ assert compare_test_results (test_results_used_socket , optimized_test_results_used_socket ) is False
527
514
528
515
finally :
529
516
# cleanup
530
517
output_file .unlink (missing_ok = True )
531
518
shutil .rmtree (replay_tests_dir , ignore_errors = True )
532
519
fto_unused_socket_path .write_text (original_fto_unused_socket_code )
533
520
fto_used_socket_path .write_text (original_fto_used_socket_code )
534
-
0 commit comments