2222 module_name_from_file_path ,
2323 path_belongs_to_site_packages ,
2424)
25+ from codeflash .code_utils .config_consts import REPEAT_OPTIMIZATION_PROBABILITY
2526from codeflash .code_utils .env_utils import get_pr_number
2627from codeflash .code_utils .git_utils import get_git_diff , get_repo_owner_and_name
2728from codeflash .code_utils .time_utils import humanize_runtime
2829from codeflash .discovery .discover_unit_tests import discover_unit_tests
2930from codeflash .models .models import FunctionParent
3031from codeflash .telemetry .posthog_cf import ph
31- from codeflash .code_utils .config_consts import REPEAT_OPTIMIZATION_PROBABILITY
3232
3333if TYPE_CHECKING :
3434 from libcst import CSTNode
@@ -460,9 +460,7 @@ def inspect_top_level_functions_or_methods(
460460 )
461461
462462
463- def check_optimization_status (
464- functions_by_file : dict [Path , list [FunctionToOptimize ]], owner : str , repo : str , pr_number : int
465- ) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
463+ def check_optimization_status (functions_by_file : dict [Path , list [FunctionToOptimize ]]) -> list [tuple [str , str ]]:
466464 """Check which functions have already been optimized and filter them out.
467465
468466 This function calls the optimization API to:
@@ -480,7 +478,19 @@ def check_optimization_status(
480478 Tuple of (filtered_functions_dict, remaining_count)
481479
482480 """
483- # Build the code_contexts dictionary for the API call
481+ # Check optimization status if repository info is provided
482+ # already_optimized_count = 0
483+ try :
484+ repository = git .Repo (search_parent_directories = True )
485+ owner , repo = get_repo_owner_and_name (repository )
486+ except git .exc .InvalidGitRepositoryError :
487+ logger .warning ("No git repository found" )
488+ owner , repo = None , None
489+ pr_number = get_pr_number ()
490+
491+ if not owner or not repo or pr_number is None :
492+ return []
493+
484494 code_contexts = {}
485495 path_to_function_map = {}
486496
@@ -497,29 +507,13 @@ def check_optimization_status(
497507
498508 try :
499509 result = is_function_being_optimized_again (owner , repo , pr_number , code_contexts )
500- already_optimized_paths = set (result .get ("already_optimized_paths" , []))
501-
502- # Filter out already optimized functions
503- filtered_functions = defaultdict (list )
504- remaining_count = 0
505-
506- for path_key , (file_path , func ) in path_to_function_map .items ():
507- if path_key not in already_optimized_paths :
508- filtered_functions [file_path ].append (func )
509- remaining_count += 1
510- else :
511- if random .random () < REPEAT_OPTIMIZATION_PROBABILITY :
512- logger .info (f"Attempting more optimization on { path_key } " )
513- filtered_functions [file_path ].append (func )
514- remaining_count += 1
515-
516- return dict (filtered_functions ), remaining_count
510+ already_optimized_paths : list [tuple [str , str ]] = result .get ("already_optimized_paths" , [])
511+ return already_optimized_paths
517512
518513 except Exception as e :
519514 logger .warning (f"Failed to check optimization status: { e } " )
520515 # Return all functions if API call fails
521- total_count = sum (len (funcs ) for funcs in functions_by_file .values ())
522- return functions_by_file , total_count
516+ return []
523517
524518
525519def filter_functions (
@@ -531,33 +525,25 @@ def filter_functions(
531525 previous_checkpoint_functions : dict [Path , dict [str , Any ]] | None = None ,
532526 disable_logs : bool = False , # noqa: FBT001, FBT002
533527) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
528+ filtered_modified_functions : dict [str , list [FunctionToOptimize ]] = {}
534529 blocklist_funcs = get_blocklisted_functions ()
535- already_optimized_count = 0
536- path_based_functions = {Path (k ): v for k , v in modified_functions .items () if v }
537- try :
538- repository = git .Repo (Path .cwd (), search_parent_directories = True )
539- owner , repo = get_repo_owner_and_name (repository )
540- except git .exc .InvalidGitRepositoryError :
541- logger .warning ("No git repository found" )
542- owner , repo = None , None
543- pr_number = get_pr_number ()
544- if owner and repo and pr_number is not None :
545- path_based_functions , functions_count = check_optimization_status (path_based_functions , owner , repo , pr_number )
546530 logger .debug (f"Blocklisted functions: { blocklist_funcs } " )
547531 # Remove any function that we don't want to optimize
532+ already_optimized_paths = check_optimization_status (modified_functions )
548533
549534 # Ignore files with submodule path, cache the submodule paths
550535 submodule_paths = ignored_submodule_paths (module_root )
551536
552- filtered_modified_functions : dict [str , list [FunctionToOptimize ]] = {}
553537 functions_count : int = 0
554538 test_functions_removed_count : int = 0
555539 non_modules_removed_count : int = 0
556540 site_packages_removed_count : int = 0
557541 ignore_paths_removed_count : int = 0
558542 malformed_paths_count : int = 0
543+ already_optimized_count : int = 0
559544 submodule_ignored_paths_count : int = 0
560545 blocklist_funcs_removed_count : int = 0
546+ already_optimized_paths_removed_count : int = 0
561547 previous_checkpoint_functions_removed_count : int = 0
562548 tests_root_str = str (tests_root )
563549 module_root_str = str (module_root )
@@ -590,6 +576,7 @@ def filter_functions(
590576 except SyntaxError :
591577 malformed_paths_count += 1
592578 continue
579+
593580 if blocklist_funcs :
594581 functions_tmp = []
595582 for function in _functions :
@@ -603,6 +590,17 @@ def filter_functions(
603590 # This function is NOT in blocklist. we can keep it
604591 functions_tmp .append (function )
605592 _functions = functions_tmp
593+ functions_tmp = []
594+ for function in _functions :
595+ if (
596+ function .file_path .name ,
597+ function .qualified_name ,
598+ ) in already_optimized_paths and random .random () > REPEAT_OPTIMIZATION_PROBABILITY :
599+ # This function is in blocklist, we can skip it with a probability
600+ already_optimized_paths_removed_count += 1
601+ continue
602+ functions_tmp .append (function )
603+ _functions = functions_tmp
606604
607605 if previous_checkpoint_functions :
608606 functions_tmp = []
@@ -616,14 +614,6 @@ def filter_functions(
616614 filtered_modified_functions [file_path ] = _functions
617615 functions_count += len (_functions )
618616
619- # Convert to Path keys for optimization check
620-
621-
622- # Check optimization status if repository info is provided
623-
624- initial_count = sum (len (funcs ) for funcs in filtered_modified_functions .values ())
625- already_optimized_count = initial_count - functions_count
626-
627617 if not disable_logs :
628618 log_info = {
629619 f"{ test_functions_removed_count } test function{ 's' if test_functions_removed_count != 1 else '' } " : test_functions_removed_count ,
@@ -641,7 +631,7 @@ def filter_functions(
641631 logger .info (f"Ignoring: { log_string } " )
642632 console .rule ()
643633
644- return path_based_functions , functions_count
634+ return { Path ( k ): v for k , v in filtered_modified_functions . items () if v } , functions_count
645635
646636
647637def filter_files_optimized (file_path : Path , tests_root : Path , ignore_paths : list [Path ], module_root : Path ) -> bool :
0 commit comments