2222 module_name_from_file_path ,
2323 path_belongs_to_site_packages ,
2424)
25+ from codeflash .code_utils .config_consts import REPEAT_OPTIMIZATION_PROBABILITY
2526from codeflash .code_utils .env_utils import get_pr_number
2627from codeflash .code_utils .git_utils import get_git_diff , get_repo_owner_and_name
2728from codeflash .code_utils .time_utils import humanize_runtime
2829from codeflash .discovery .discover_unit_tests import discover_unit_tests
2930from codeflash .models .models import FunctionParent
3031from codeflash .telemetry .posthog_cf import ph
31- from codeflash .code_utils .config_consts import REPEAT_OPTIMIZATION_PROBABILITY
3232
3333if TYPE_CHECKING :
3434 from libcst import CSTNode
@@ -460,9 +460,7 @@ def inspect_top_level_functions_or_methods(
460460 )
461461
462462
463- def check_optimization_status (
464- functions_by_file : dict [Path , list [FunctionToOptimize ]], owner : str , repo : str , pr_number : int
465- ) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
463+ def check_optimization_status (functions_by_file : dict [Path , list [FunctionToOptimize ]]) -> list [tuple [str , str ]]:
466464 """Check which functions have already been optimized and filter them out.
467465
468466 This function calls the optimization API to:
@@ -480,7 +478,19 @@ def check_optimization_status(
480478 Tuple of (filtered_functions_dict, remaining_count)
481479
482480 """
483- # Build the code_contexts dictionary for the API call
481+ # Check optimization status if repository info is provided
482+ # already_optimized_count = 0
483+ try :
484+ repository = git .Repo (search_parent_directories = True )
485+ owner , repo = get_repo_owner_and_name (repository )
486+ except git .exc .InvalidGitRepositoryError :
487+ logger .warning ("No git repository found" )
488+ owner , repo = None , None
489+ pr_number = get_pr_number ()
490+
491+ if not owner or not repo or pr_number is None :
492+ return []
493+
484494 code_contexts = {}
485495 path_to_function_map = {}
486496
@@ -497,29 +507,13 @@ def check_optimization_status(
497507
498508 try :
499509 result = is_function_being_optimized_again (owner , repo , pr_number , code_contexts )
500- already_optimized_paths = set (result .get ("already_optimized_paths" , []))
501-
502- # Filter out already optimized functions
503- filtered_functions = defaultdict (list )
504- remaining_count = 0
505-
506- for path_key , (file_path , func ) in path_to_function_map .items ():
507- if path_key not in already_optimized_paths :
508- filtered_functions [file_path ].append (func )
509- remaining_count += 1
510- else :
511- if random .random () < REPEAT_OPTIMIZATION_PROBABILITY :
512- logger .info (f"Attempting more optimization on { path_key } " )
513- filtered_functions [file_path ].append (func )
514- remaining_count += 1
515-
516- return dict (filtered_functions ), remaining_count
510+ already_optimized_paths : list [tuple [str , str ]] = result .get ("already_optimized_paths" , [])
511+ return already_optimized_paths
517512
518513 except Exception as e :
519514 logger .warning (f"Failed to check optimization status: { e } " )
520515 # Return all functions if API call fails
521- total_count = sum (len (funcs ) for funcs in functions_by_file .values ())
522- return functions_by_file , total_count
516+ return []
523517
524518
525519def filter_functions (
@@ -531,22 +525,25 @@ def filter_functions(
531525 previous_checkpoint_functions : dict [Path , dict [str , Any ]] | None = None ,
532526 disable_logs : bool = False , # noqa: FBT001, FBT002
533527) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
528+ filtered_modified_functions : dict [str , list [FunctionToOptimize ]] = {}
534529 blocklist_funcs = get_blocklisted_functions ()
535530 logger .debug (f"Blocklisted functions: { blocklist_funcs } " )
536531 # Remove any function that we don't want to optimize
532+ already_optimized_paths = check_optimization_status (modified_functions )
537533
538534 # Ignore files with submodule path, cache the submodule paths
539535 submodule_paths = ignored_submodule_paths (module_root )
540536
541- filtered_modified_functions : dict [str , list [FunctionToOptimize ]] = {}
542537 functions_count : int = 0
543538 test_functions_removed_count : int = 0
544539 non_modules_removed_count : int = 0
545540 site_packages_removed_count : int = 0
546541 ignore_paths_removed_count : int = 0
547542 malformed_paths_count : int = 0
543+ already_optimized_count : int = 0
548544 submodule_ignored_paths_count : int = 0
549545 blocklist_funcs_removed_count : int = 0
546+ already_optimized_paths_removed_count : int = 0
550547 previous_checkpoint_functions_removed_count : int = 0
551548 tests_root_str = str (tests_root )
552549 module_root_str = str (module_root )
@@ -579,6 +576,7 @@ def filter_functions(
579576 except SyntaxError :
580577 malformed_paths_count += 1
581578 continue
579+
582580 if blocklist_funcs :
583581 functions_tmp = []
584582 for function in _functions :
@@ -592,6 +590,17 @@ def filter_functions(
592590 # This function is NOT in blocklist. we can keep it
593591 functions_tmp .append (function )
594592 _functions = functions_tmp
593+ functions_tmp = []
594+ for function in _functions :
595+ if (
596+ function .file_path .name ,
597+ function .qualified_name ,
598+ ) in already_optimized_paths and random .random () > REPEAT_OPTIMIZATION_PROBABILITY :
599+ # This function is in blocklist, we can skip it with a probability
600+ already_optimized_paths_removed_count += 1
601+ continue
602+ functions_tmp .append (function )
603+ _functions = functions_tmp
595604
596605 if previous_checkpoint_functions :
597606 functions_tmp = []
@@ -605,23 +614,6 @@ def filter_functions(
605614 filtered_modified_functions [file_path ] = _functions
606615 functions_count += len (_functions )
607616
608- # Convert to Path keys for optimization check
609- path_based_functions = {Path (k ): v for k , v in filtered_modified_functions .items () if v }
610-
611- # Check optimization status if repository info is provided
612- already_optimized_count = 0
613- try :
614- repository = git .Repo (Path .cwd (), search_parent_directories = True )
615- owner , repo = get_repo_owner_and_name (repository )
616- except git .exc .InvalidGitRepositoryError :
617- logger .warning ("No git repository found" )
618- owner , repo = None , None
619- pr_number = get_pr_number ()
620- if owner and repo and pr_number is not None :
621- path_based_functions , functions_count = check_optimization_status (path_based_functions , owner , repo , pr_number )
622- initial_count = sum (len (funcs ) for funcs in filtered_modified_functions .values ())
623- already_optimized_count = initial_count - functions_count
624-
625617 if not disable_logs :
626618 log_info = {
627619 f"{ test_functions_removed_count } test function{ 's' if test_functions_removed_count != 1 else '' } " : test_functions_removed_count ,
@@ -639,7 +631,7 @@ def filter_functions(
639631 logger .info (f"Ignoring: { log_string } " )
640632 console .rule ()
641633
642- return path_based_functions , functions_count
634+ return { Path ( k ): v for k , v in filtered_modified_functions . items () if v } , functions_count
643635
644636
645637def filter_files_optimized (file_path : Path , tests_root : Path , ignore_paths : list [Path ], module_root : Path ) -> bool :
0 commit comments