6666from codeflash .result .critic import coverage_critic , performance_gain , quantity_of_tests_critic , speedup_critic
6767from codeflash .result .explanation import Explanation
6868from codeflash .telemetry .posthog_cf import ph
69- from codeflash .verification .bayesian_analysis import compare_function_runtime_distributions
7069from codeflash .verification .concolic_testing import generate_concolic_tests
7170from codeflash .verification .equivalence import compare_test_results
7271from codeflash .verification .instrument_codeflash_capture import instrument_codeflash_capture
7978if TYPE_CHECKING :
8079 from argparse import Namespace
8180
82- import numpy as np
83- import numpy .typing as npt
84-
8581 from codeflash .either import Result
8682 from codeflash .models .models import CoverageData , FunctionSource , OptimizedCandidate
8783
@@ -369,12 +365,7 @@ def optimize_function(
369365 cleanup_paths (paths_to_cleanup )
370366 return Failure (baseline_result .failure ())
371367
372- (
373- original_code_baseline ,
374- original_code_runtime_distribution ,
375- original_code_runtime_statistics ,
376- test_functions_to_remove ,
377- ) = baseline_result .unwrap ()
368+ original_code_baseline , test_functions_to_remove = baseline_result .unwrap ()
378369 if isinstance (original_code_baseline , OriginalCodeBaseline ) and not coverage_critic (
379370 original_code_baseline .coverage_results , self .args .test_framework
380371 ):
@@ -393,7 +384,6 @@ def optimize_function(
393384 function_to_optimize = function_to_optimize ,
394385 original_code = validated_original_code [function_to_optimize .file_path ].source_code ,
395386 original_code_baseline = original_code_baseline ,
396- original_code_runtime_distribution = original_code_runtime_distribution ,
397387 original_helper_code = original_helper_code ,
398388 function_trace_id = function_trace_id [:- 4 ] + f"EXP{ u } " if should_run_experiment else function_trace_id ,
399389 file_path_to_helper_classes = file_path_to_helper_classes ,
@@ -504,14 +494,12 @@ def determine_best_candidate(
504494 function_to_optimize : FunctionToOptimize ,
505495 original_code : str ,
506496 original_code_baseline : OriginalCodeBaseline ,
507- original_code_runtime_distribution : npt .NDArray [np .float64 ],
508497 original_helper_code : dict [Path , str ],
509498 function_trace_id : str ,
510499 file_path_to_helper_classes : dict [Path , set [str ]],
511500 ) -> BestOptimization | None :
512501 best_optimization : BestOptimization | None = None
513502 best_runtime_until_now = original_code_baseline .runtime
514- best_speedup_ratio_until_now = 1.0
515503
516504 speedup_ratios : dict [str , float | None ] = {}
517505 optimized_runtimes : dict [str , float | None ] = {}
@@ -561,9 +549,7 @@ def determine_best_candidate(
561549 is_correct [candidate .optimization_id ] = False
562550 speedup_ratios [candidate .optimization_id ] = None
563551 else :
564- candidate_result , candidate_runtime_distribution , candidate_runtime_statistics = (
565- run_results .unwrap ()
566- )
552+ candidate_result : OptimizedCandidateResult = run_results .unwrap ()
567553 best_test_runtime = candidate_result .best_test_runtime
568554 optimized_runtimes [candidate .optimization_id ] = best_test_runtime
569555 is_correct [candidate .optimization_id ] = True
@@ -572,7 +558,7 @@ def determine_best_candidate(
572558 )
573559 speedup_ratios [candidate .optimization_id ] = perf_gain
574560
575- tree = Tree (f"Candidate #{ candidate_index } - Sum of Minimum Runtimes " )
561+ tree = Tree (f"Candidate #{ candidate_index } - Runtime Information " )
576562 if speedup_critic (
577563 candidate_result , original_code_baseline .runtime , best_runtime_until_now
578564 ) and quantity_of_tests_critic (candidate_result ):
@@ -605,34 +591,6 @@ def determine_best_candidate(
605591 console .print (tree )
606592 console .rule ()
607593
608- if candidate_runtime_distribution .any () and candidate_runtime_statistics :
609- speedup_stats = compare_function_runtime_distributions (
610- original_code_runtime_distribution , candidate_runtime_distribution
611- )
612- tree = Tree (f"Candidate #{ candidate_index } - Bayesian Bootstrapping Nonparametric Analysis" )
613- tree .add (
614- f"Expected candidate summed runtime (95% Credible Interval) = ["
615- f"{ humanize_runtime (round (candidate_runtime_statistics ['credible_interval_lower_bound' ]))} "
616- f", "
617- f"{ humanize_runtime (round (candidate_runtime_statistics ['credible_interval_upper_bound' ]))} ]"
618- f"\n Median = { humanize_runtime (round (candidate_runtime_statistics ['median' ]))} "
619- f"\n Speedup ratio of candidate vs original:"
620- f"\n 95% Credible Interval = [{ speedup_stats ['credible_interval_lower_bound' ]:.3f} X, "
621- f"{ speedup_stats ['credible_interval_upper_bound' ]:.3f} X]"
622- f"\n median = { speedup_stats ['median' ]:.3f} X"
623- )
624- if speedup_stats ["credible_interval_lower_bound" ] > 1.0 :
625- tree .add ("The candidate is faster than the original code with a 95% probability." )
626- if speedup_stats ["median" ] > best_speedup_ratio_until_now :
627- best_speedup_ratio_until_now = float (speedup_stats ["median" ])
628- tree .add ("This candidate is the best candidate so far." )
629- else :
630- tree .add ("This candidate is not faster than the current fastest candidate." )
631- else :
632- tree .add ("It is inconclusive whether the candidate is faster than the original code." )
633- console .print (tree )
634- console .rule ()
635-
636594 self .write_code_and_helpers (original_code , original_helper_code , function_to_optimize .file_path )
637595 except KeyboardInterrupt as e :
638596 self .write_code_and_helpers (original_code , original_helper_code , function_to_optimize .file_path )
@@ -1011,7 +969,7 @@ def establish_original_code_baseline(
1011969 function_to_optimize : FunctionToOptimize ,
1012970 original_helper_code : dict [Path , str ],
1013971 file_path_to_helper_classes : dict [Path , set [str ]],
1014- ) -> Result [tuple [OriginalCodeBaseline , npt . NDArray [ np . float64 ], dict [ str , np . float64 ], list [str ]], str ]:
972+ ) -> Result [tuple [OriginalCodeBaseline , list [str ]], str ]:
1015973 # For the original function - run the tests and get the runtime, plus coverage
1016974 with progress_bar (f"Establishing original code baseline for { function_name } " ):
1017975 assert (test_framework := self .args .test_framework ) in ["pytest" , "unittest" ]
@@ -1121,19 +1079,7 @@ def establish_original_code_baseline(
11211079 f"{ humanize_runtime (total_timing )} per full loop"
11221080 )
11231081 console .rule ()
1124- logger .debug (f"Total original code summed runtime (ns): { total_timing } " )
1125- console .rule ()
1126- runtime_distribution , runtime_statistics = benchmarking_results .bayesian_nonparametric_bootstrap_analysis (
1127- 100_000
1128- )
1129- logger .info (
1130- f"Bayesian Bootstrapping Nonparametric Analysis"
1131- f"\n Expected original code summed runtime (95% Credible Interval) = ["
1132- f"{ humanize_runtime (round (runtime_statistics ['credible_interval_lower_bound' ]))} , "
1133- f"{ humanize_runtime (round (runtime_statistics ['credible_interval_upper_bound' ]))} ], "
1134- f"\n median: { humanize_runtime (round (runtime_statistics ['median' ]))} "
1135- )
1136-
1082+ logger .debug (f"Total original code runtime (ns): { total_timing } " )
11371083 return Success (
11381084 (
11391085 OriginalCodeBaseline (
@@ -1142,8 +1088,6 @@ def establish_original_code_baseline(
11421088 runtime = total_timing ,
11431089 coverage_results = coverage_results ,
11441090 ),
1145- runtime_distribution ,
1146- runtime_statistics ,
11471091 functions_to_remove ,
11481092 )
11491093 )
@@ -1156,7 +1100,8 @@ def run_optimized_candidate(
11561100 function_to_optimize : FunctionToOptimize ,
11571101 original_helper_code : dict [Path , str ],
11581102 file_path_to_helper_classes : dict [Path , set [str ]],
1159- ) -> Result [tuple [OptimizedCandidateResult , npt .NDArray [np .float64 ], dict [str , np .float64 ]], str ]:
1103+ ) -> Result [OptimizedCandidateResult , str ]:
1104+
11601105 assert (test_framework := self .args .test_framework ) in ["pytest" , "unittest" ]
11611106
11621107 with progress_bar ("Testing optimization candidate" ):
@@ -1250,35 +1195,16 @@ def run_optimized_candidate(
12501195 if (total_candidate_timing := candidate_benchmarking_results .total_passed_runtime ()) == 0 :
12511196 logger .warning ("The overall test runtime of the optimized function is 0, couldn't run tests." )
12521197 console .rule ()
1253- runtime_distribution : npt .NDArray [np .float64 ] = np .array ([])
1254- runtime_statistics : dict [str , np .float64 ] = {}
1255- else :
1256- logger .debug (
1257- f"Total optimized code { optimization_candidate_index } runtime (ns): { total_candidate_timing } "
1258- )
1259- console .rule ()
1260- runtime_distribution , runtime_statistics = (
1261- candidate_benchmarking_results .bayesian_nonparametric_bootstrap_analysis (100_000 )
1262- )
1263- logger .debug (
1264- f"Overall code summed runtime (95% Credible Interval) = ["
1265- f"{ humanize_runtime (round (runtime_statistics ['credible_interval_lower_bound' ]))} , "
1266- f"{ humanize_runtime (round (runtime_statistics ['credible_interval_upper_bound' ]))} ], median: "
1267- f"{ humanize_runtime (round (runtime_statistics ['median' ]))} "
1268- )
1269- console .rule ()
1198+
1199+ logger .debug (f"Total optimized code { optimization_candidate_index } runtime (ns): { total_candidate_timing } " )
12701200 return Success (
1271- (
1272- OptimizedCandidateResult (
1273- max_loop_count = loop_count ,
1274- best_test_runtime = total_candidate_timing ,
1275- behavior_test_results = candidate_behavior_results ,
1276- benchmarking_test_results = candidate_benchmarking_results ,
1277- optimization_candidate_index = optimization_candidate_index ,
1278- total_candidate_timing = total_candidate_timing ,
1279- ),
1280- runtime_distribution ,
1281- runtime_statistics ,
1201+ OptimizedCandidateResult (
1202+ max_loop_count = loop_count ,
1203+ best_test_runtime = total_candidate_timing ,
1204+ behavior_test_results = candidate_behavior_results ,
1205+ benchmarking_test_results = candidate_benchmarking_results ,
1206+ optimization_candidate_index = optimization_candidate_index ,
1207+ total_candidate_timing = total_candidate_timing ,
12821208 )
12831209 )
12841210
@@ -1327,13 +1253,13 @@ def run_and_parse_tests(
13271253 raise ValueError (f"Unexpected testing type: { testing_type } " )
13281254 except subprocess .TimeoutExpired :
13291255 logger .exception (
1330- f' Error running tests in { ", " .join (str (f ) for f in test_files .test_files )} .\n Timeout Error'
1256+ f" Error running tests in { ', ' .join (str (f ) for f in test_files .test_files )} .\n Timeout Error"
13311257 )
13321258 return TestResults (), None
13331259 if run_result .returncode != 0 :
13341260 logger .debug (
1335- f' Nonzero return code { run_result .returncode } when running tests in '
1336- f' { ", " .join ([str (f .instrumented_behavior_file_path ) for f in test_files .test_files ])} .\n '
1261+ f" Nonzero return code { run_result .returncode } when running tests in "
1262+ f" { ', ' .join ([str (f .instrumented_behavior_file_path ) for f in test_files .test_files ])} .\n "
13371263 f"stdout: { run_result .stdout } \n "
13381264 f"stderr: { run_result .stderr } \n "
13391265 )
0 commit comments