88from collections import defaultdict
99from functools import cache
1010from pathlib import Path
11- from typing import TYPE_CHECKING , Optional
11+ from typing import TYPE_CHECKING , Any , Optional
1212
1313import git
1414import libcst as cst
@@ -145,6 +145,7 @@ def qualified_name(self) -> str:
145145 def qualified_name_with_modules_from_root (self , project_root_path : Path ) -> str :
146146 return f"{ module_name_from_file_path (self .file_path , project_root_path )} .{ self .qualified_name } "
147147
148+
148149def get_functions_to_optimize (
149150 optimize_all : str | None ,
150151 replay_test : str | None ,
@@ -154,10 +155,11 @@ def get_functions_to_optimize(
154155 ignore_paths : list [Path ],
155156 project_root : Path ,
156157 module_root : Path ,
158+ previous_checkpoint_functions : dict [str , dict [str , str ]] | None = None ,
157159) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
158- assert sum ([ bool ( optimize_all ), bool ( replay_test ), bool ( file )]) <= 1 , (
159- "Only one of optimize_all, replay_test, or file should be provided"
160- )
160+ assert (
161+ sum ([ bool ( optimize_all ), bool ( replay_test ), bool ( file )]) <= 1
162+ ), "Only one of optimize_all, replay_test, or file should be provided"
161163 functions : dict [str , list [FunctionToOptimize ]]
162164 with warnings .catch_warnings ():
163165 warnings .simplefilter (action = "ignore" , category = SyntaxWarning )
@@ -198,7 +200,7 @@ def get_functions_to_optimize(
198200 ph ("cli-optimizing-git-diff" )
199201 functions = get_functions_within_git_diff ()
200202 filtered_modified_functions , functions_count = filter_functions (
201- functions , test_cfg .tests_root , ignore_paths , project_root , module_root
203+ functions , test_cfg .tests_root , ignore_paths , project_root , module_root , previous_checkpoint_functions
202204 )
203205 logger .info (f"Found { functions_count } function{ 's' if functions_count > 1 else '' } to optimize" )
204206 return filtered_modified_functions , functions_count
@@ -414,6 +416,7 @@ def filter_functions(
414416 ignore_paths : list [Path ],
415417 project_root : Path ,
416418 module_root : Path ,
419+ previous_checkpoint_functions : dict [Path , dict [str , Any ]] | None = None ,
417420 disable_logs : bool = False ,
418421) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
419422 blocklist_funcs = get_blocklisted_functions ()
@@ -430,13 +433,16 @@ def filter_functions(
430433 ignore_paths_removed_count : int = 0
431434 malformed_paths_count : int = 0
432435 submodule_ignored_paths_count : int = 0
436+ blocklist_funcs_removed_count : int = 0
437+ previous_checkpoint_functions_removed_count : int = 0
433438 tests_root_str = str (tests_root )
434439 module_root_str = str (module_root )
435440 # We desperately need Python 3.10+ only support to make this code readable with structural pattern matching
436441 for file_path_path , functions in modified_functions .items ():
442+ _functions = functions
437443 file_path = str (file_path_path )
438444 if file_path .startswith (tests_root_str + os .sep ):
439- test_functions_removed_count += len (functions )
445+ test_functions_removed_count += len (_functions )
440446 continue
441447 if file_path in ignore_paths or any (
442448 file_path .startswith (str (ignore_path ) + os .sep ) for ignore_path in ignore_paths
@@ -449,27 +455,39 @@ def filter_functions(
449455 submodule_ignored_paths_count += 1
450456 continue
451457 if path_belongs_to_site_packages (Path (file_path )):
452- site_packages_removed_count += len (functions )
458+ site_packages_removed_count += len (_functions )
453459 continue
454460 if not file_path .startswith (module_root_str + os .sep ):
455- non_modules_removed_count += len (functions )
461+ non_modules_removed_count += len (_functions )
456462 continue
457463 try :
458464 ast .parse (f"import { module_name_from_file_path (Path (file_path ), project_root )} " )
459465 except SyntaxError :
460466 malformed_paths_count += 1
461467 continue
462468 if blocklist_funcs :
463- functions = [
464- function
465- for function in functions
469+ functions_tmp = []
470+ for function in _functions :
466471 if not (
467472 function .file_path .name in blocklist_funcs
468473 and function .qualified_name in blocklist_funcs [function .file_path .name ]
469- )
470- ]
471- filtered_modified_functions [file_path ] = functions
472- functions_count += len (functions )
474+ ):
475+ blocklist_funcs_removed_count += 1
476+ continue
477+ functions_tmp .append (function )
478+ _functions = functions_tmp
479+
480+ if previous_checkpoint_functions :
481+ functions_tmp = []
482+ for function in _functions :
483+ if function .qualified_name_with_modules_from_root (project_root ) in previous_checkpoint_functions :
484+ previous_checkpoint_functions_removed_count += 1
485+ continue
486+ functions_tmp .append (function )
487+ _functions = functions_tmp
488+
489+ filtered_modified_functions [file_path ] = _functions
490+ functions_count += len (_functions )
473491
474492 if not disable_logs :
475493 log_info = {
@@ -479,6 +497,8 @@ def filter_functions(
479497 f"{ non_modules_removed_count } function{ 's' if non_modules_removed_count != 1 else '' } outside module-root" : non_modules_removed_count ,
480498 f"{ ignore_paths_removed_count } file{ 's' if ignore_paths_removed_count != 1 else '' } from ignored paths" : ignore_paths_removed_count ,
481499 f"{ submodule_ignored_paths_count } file{ 's' if submodule_ignored_paths_count != 1 else '' } from ignored submodules" : submodule_ignored_paths_count ,
500+ f"{ blocklist_funcs_removed_count } function{ 's' if blocklist_funcs_removed_count != 1 else '' } as previously optimized" : blocklist_funcs_removed_count ,
501+ f"{ previous_checkpoint_functions_removed_count } function{ 's' if previous_checkpoint_functions_removed_count != 1 else '' } skipped from checkpoint" : previous_checkpoint_functions_removed_count ,
482502 }
483503 log_string = "\n " .join ([k for k , v in log_info .items () if v > 0 ])
484504 if log_string :
0 commit comments