11from __future__ import annotations
22
33import ast
4+ import hashlib
45import os
56import random
67import warnings
@@ -145,15 +146,56 @@ def qualified_name(self) -> str:
145146 def qualified_name_with_modules_from_root (self , project_root_path : Path ) -> str :
146147 return f"{ module_name_from_file_path (self .file_path , project_root_path )} .{ self .qualified_name } "
147148
149+ def get_code_context_hash (self ) -> str :
150+ """Generate a SHA-256 hash representing the code context of this function.
151+
152+ This hash includes the function's code content, file path, and qualified name
153+ to uniquely identify the function for optimization tracking.
154+ """
155+ try :
156+ with open (self .file_path , 'r' , encoding = 'utf-8' ) as f :
157+ file_content = f .read ()
158+
159+ # Extract the function's code content
160+ lines = file_content .splitlines ()
161+ if self .starting_line is not None and self .ending_line is not None :
162+ # Use line numbers if available (1-indexed to 0-indexed)
163+ function_content = '\n ' .join (lines [self .starting_line - 1 :self .ending_line ])
164+ else :
165+ # Fallback: use the entire file content if line numbers aren't available
166+ function_content = file_content
167+
168+ # Create a context string that includes:
169+ # - File path (relative to make it portable)
170+ # - Qualified function name
171+ # - Function code content
172+ context_parts = [
173+ str (self .file_path .name ), # Just filename for portability
174+ self .qualified_name ,
175+ function_content .strip ()
176+ ]
177+
178+ context_string = '\n ---\n ' .join (context_parts )
179+
180+ # Generate SHA-256 hash
181+ return hashlib .sha256 (context_string .encode ('utf-8' )).hexdigest ()
182+
183+ except (OSError , IOError ) as e :
184+ logger .warning (f"Could not read file { self .file_path } for hashing: { e } " )
185+ # Fallback hash using available metadata
186+ fallback_string = f"{ self .file_path .name } :{ self .qualified_name } "
187+ return hashlib .sha256 (fallback_string .encode ('utf-8' )).hexdigest ()
188+
189+
148190def get_functions_to_optimize (
149- optimize_all : str | None ,
150- replay_test : str | None ,
151- file : Path | None ,
152- only_get_this_function : str | None ,
153- test_cfg : TestConfig ,
154- ignore_paths : list [Path ],
155- project_root : Path ,
156- module_root : Path ,
191+ optimize_all : str | None ,
192+ replay_test : str | None ,
193+ file : Path | None ,
194+ only_get_this_function : str | None ,
195+ test_cfg : TestConfig ,
196+ ignore_paths : list [Path ],
197+ project_root : Path ,
198+ module_root : Path ,
157199) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
158200 assert sum ([bool (optimize_all ), bool (replay_test ), bool (file )]) <= 1 , (
159201 "Only one of optimize_all, replay_test, or file should be provided"
@@ -186,7 +228,7 @@ def get_functions_to_optimize(
186228 found_function = None
187229 for fn in functions .get (file , []):
188230 if only_function_name == fn .function_name and (
189- class_name is None or class_name == fn .top_level_parent_name
231+ class_name is None or class_name == fn .top_level_parent_name
190232 ):
191233 found_function = fn
192234 if found_function is None :
@@ -224,8 +266,8 @@ def get_functions_within_git_diff() -> dict[str, list[FunctionToOptimize]]:
224266 function_to_optimize
225267 for function_to_optimize in function_lines .functions
226268 if (start_line := function_to_optimize .starting_line ) is not None
227- and (end_line := function_to_optimize .ending_line ) is not None
228- and any (start_line <= line <= end_line for line in modified_lines [path_str ])
269+ and (end_line := function_to_optimize .ending_line ) is not None
270+ and any (start_line <= line <= end_line for line in modified_lines [path_str ])
229271 ]
230272 return modified_functions
231273
@@ -258,7 +300,7 @@ def find_all_functions_in_file(file_path: Path) -> dict[Path, list[FunctionToOpt
258300
259301
260302def get_all_replay_test_functions (
261- replay_test : Path , test_cfg : TestConfig , project_root_path : Path
303+ replay_test : Path , test_cfg : TestConfig , project_root_path : Path
262304) -> dict [Path , list [FunctionToOptimize ]]:
263305 function_tests = discover_unit_tests (test_cfg , discover_only_these_tests = [replay_test ])
264306 # Get the absolute file paths for each function, excluding class name if present
@@ -273,7 +315,7 @@ def get_all_replay_test_functions(
273315 class_name = (
274316 module_path_parts [- 1 ]
275317 if module_path_parts
276- and is_class_defined_in_file (
318+ and is_class_defined_in_file (
277319 module_path_parts [- 1 ], Path (project_root_path , * module_path_parts [:- 1 ]).with_suffix (".py" )
278320 )
279321 else None
@@ -323,7 +365,8 @@ def ignored_submodule_paths(module_root: str) -> list[str]:
323365
324366class TopLevelFunctionOrMethodVisitor (ast .NodeVisitor ):
325367 def __init__ (
326- self , file_name : Path , function_or_method_name : str , class_name : str | None = None , line_no : int | None = None
368+ self , file_name : Path , function_or_method_name : str , class_name : str | None = None ,
369+ line_no : int | None = None
327370 ) -> None :
328371 self .file_name = file_name
329372 self .class_name = class_name
@@ -354,13 +397,13 @@ def visit_ClassDef(self, node: ast.ClassDef) -> None:
354397 if isinstance (body_node , ast .FunctionDef ) and body_node .name == self .function_name :
355398 self .is_top_level = True
356399 if any (
357- isinstance (decorator , ast .Name ) and decorator .id == "classmethod"
358- for decorator in body_node .decorator_list
400+ isinstance (decorator , ast .Name ) and decorator .id == "classmethod"
401+ for decorator in body_node .decorator_list
359402 ):
360403 self .is_classmethod = True
361404 elif any (
362- isinstance (decorator , ast .Name ) and decorator .id == "staticmethod"
363- for decorator in body_node .decorator_list
405+ isinstance (decorator , ast .Name ) and decorator .id == "staticmethod"
406+ for decorator in body_node .decorator_list
364407 ):
365408 self .is_staticmethod = True
366409 return
@@ -369,13 +412,13 @@ def visit_ClassDef(self, node: ast.ClassDef) -> None:
369412 # This way, if we don't have the class name, we can still find the static method
370413 for body_node in node .body :
371414 if (
372- isinstance (body_node , ast .FunctionDef )
373- and body_node .name == self .function_name
374- and body_node .lineno in {self .line_no , self .line_no + 1 }
375- and any (
376- isinstance (decorator , ast .Name ) and decorator .id == "staticmethod"
377- for decorator in body_node .decorator_list
378- )
415+ isinstance (body_node , ast .FunctionDef )
416+ and body_node .name == self .function_name
417+ and body_node .lineno in {self .line_no , self .line_no + 1 }
418+ and any (
419+ isinstance (decorator , ast .Name ) and decorator .id == "staticmethod"
420+ for decorator in body_node .decorator_list
421+ )
379422 ):
380423 self .is_staticmethod = True
381424 self .is_top_level = True
@@ -386,7 +429,7 @@ def visit_ClassDef(self, node: ast.ClassDef) -> None:
386429
387430
388431def inspect_top_level_functions_or_methods (
389- file_name : Path , function_or_method_name : str , class_name : str | None = None , line_no : int | None = None
432+ file_name : Path , function_or_method_name : str , class_name : str | None = None , line_no : int | None = None
390433) -> FunctionProperties :
391434 with open (file_name , encoding = "utf8" ) as file :
392435 try :
@@ -408,13 +451,93 @@ def inspect_top_level_functions_or_methods(
408451 )
409452
410453
454+ def check_optimization_status (
455+ functions_by_file : dict [Path , list [FunctionToOptimize ]],
456+ owner : str ,
457+ repo : str ,
458+ pr_number : int
459+ ) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
460+ """Check which functions have already been optimized and filter them out.
461+
462+ This function calls the optimization API to:
463+ 1. Check which functions are already optimized
464+ 2. Log new function hashes to the database
465+ 3. Return only functions that need optimization
466+
467+ Args:
468+ functions_by_file: Dictionary mapping file paths to lists of functions
469+ owner: Repository owner
470+ repo: Repository name
471+ pr_number: Pull request number
472+
473+ Returns:
474+ Tuple of (filtered_functions_dict, remaining_count)
475+ """
476+ import requests
477+
478+ # Build the code_contexts dictionary for the API call
479+ code_contexts = {}
480+ path_to_function_map = {}
481+
482+ for file_path , functions in functions_by_file .items ():
483+ for func in functions :
484+ func_hash = func .get_code_context_hash ()
485+ # Use a unique path identifier that includes function info
486+ path_key = f"{ file_path } :{ func .qualified_name } "
487+ code_contexts [path_key ] = func_hash
488+ path_to_function_map [path_key ] = (file_path , func )
489+
490+ if not code_contexts :
491+ return {}, 0
492+
493+ try :
494+ # Call the optimization check API
495+ response = requests .post (
496+ "http://your-api-endpoint/is_code_being_optimized_again" , # Replace with actual endpoint
497+ json = {
498+ "owner" : owner ,
499+ "repo" : repo ,
500+ "pr_number" : str (pr_number ),
501+ "code_contexts" : code_contexts
502+ },
503+ timeout = 30
504+ )
505+ response .raise_for_status ()
506+
507+ result = response .json ()
508+ already_optimized_paths = set (result .get ("already_optimized_paths" , []))
509+
510+ logger .info (f"Found { len (already_optimized_paths )} already optimized functions" )
511+
512+ # Filter out already optimized functions
513+ filtered_functions = defaultdict (list )
514+ remaining_count = 0
515+
516+ for path_key , (file_path , func ) in path_to_function_map .items ():
517+ if path_key not in already_optimized_paths :
518+ filtered_functions [file_path ].append (func )
519+ remaining_count += 1
520+
521+ return dict (filtered_functions ), remaining_count
522+
523+ except Exception as e :
524+ logger .warning (f"Failed to check optimization status: { e } " )
525+ logger .info ("Proceeding with all functions (optimization check failed)" )
526+ # Return all functions if API call fails
527+ total_count = sum (len (funcs ) for funcs in functions_by_file .values ())
528+ return functions_by_file , total_count
529+
530+
411531def filter_functions (
412- modified_functions : dict [Path , list [FunctionToOptimize ]],
413- tests_root : Path ,
414- ignore_paths : list [Path ],
415- project_root : Path ,
416- module_root : Path ,
417- disable_logs : bool = False ,
532+ modified_functions : dict [Path , list [FunctionToOptimize ]],
533+ tests_root : Path ,
534+ ignore_paths : list [Path ],
535+ project_root : Path ,
536+ module_root : Path ,
537+ disable_logs : bool = False ,
538+ owner : str | None = None ,
539+ repo : str | None = None ,
540+ pr_number : int | None = None ,
418541) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
419542 blocklist_funcs = get_blocklisted_functions ()
420543 # Remove any function that we don't want to optimize
@@ -432,19 +555,20 @@ def filter_functions(
432555 submodule_ignored_paths_count : int = 0
433556 tests_root_str = str (tests_root )
434557 module_root_str = str (module_root )
558+
435559 # We desperately need Python 3.10+ only support to make this code readable with structural pattern matching
436560 for file_path_path , functions in modified_functions .items ():
437561 file_path = str (file_path_path )
438562 if file_path .startswith (tests_root_str + os .sep ):
439563 test_functions_removed_count += len (functions )
440564 continue
441565 if file_path in ignore_paths or any (
442- file_path .startswith (str (ignore_path ) + os .sep ) for ignore_path in ignore_paths
566+ file_path .startswith (str (ignore_path ) + os .sep ) for ignore_path in ignore_paths
443567 ):
444568 ignore_paths_removed_count += 1
445569 continue
446570 if file_path in submodule_paths or any (
447- file_path .startswith (str (submodule_path ) + os .sep ) for submodule_path in submodule_paths
571+ file_path .startswith (str (submodule_path ) + os .sep ) for submodule_path in submodule_paths
448572 ):
449573 submodule_ignored_paths_count += 1
450574 continue
@@ -464,13 +588,25 @@ def filter_functions(
464588 function
465589 for function in functions
466590 if not (
467- function .file_path .name in blocklist_funcs
468- and function .qualified_name in blocklist_funcs [function .file_path .name ]
591+ function .file_path .name in blocklist_funcs
592+ and function .qualified_name in blocklist_funcs [function .file_path .name ]
469593 )
470594 ]
471595 filtered_modified_functions [file_path ] = functions
472596 functions_count += len (functions )
473597
598+ # Convert to Path keys for optimization check
599+ path_based_functions = {Path (k ): v for k , v in filtered_modified_functions .items () if v }
600+
601+ # Check optimization status if repository info is provided
602+ already_optimized_count = 0
603+ if owner and repo and pr_number is not None :
604+ path_based_functions , functions_count = check_optimization_status (
605+ path_based_functions , owner , repo , pr_number
606+ )
607+ initial_count = sum (len (funcs ) for funcs in filtered_modified_functions .values ())
608+ already_optimized_count = initial_count - functions_count
609+
474610 if not disable_logs :
475611 log_info = {
476612 f"{ test_functions_removed_count } test function{ 's' if test_functions_removed_count != 1 else '' } " : test_functions_removed_count ,
@@ -479,13 +615,14 @@ def filter_functions(
479615 f"{ non_modules_removed_count } function{ 's' if non_modules_removed_count != 1 else '' } outside module-root" : non_modules_removed_count ,
480616 f"{ ignore_paths_removed_count } file{ 's' if ignore_paths_removed_count != 1 else '' } from ignored paths" : ignore_paths_removed_count ,
481617 f"{ submodule_ignored_paths_count } file{ 's' if submodule_ignored_paths_count != 1 else '' } from ignored submodules" : submodule_ignored_paths_count ,
618+ f"{ already_optimized_count } already optimized function{ 's' if already_optimized_count != 1 else '' } " : already_optimized_count ,
482619 }
483620 log_string = "\n " .join ([k for k , v in log_info .items () if v > 0 ])
484621 if log_string :
485622 logger .info (f"Ignoring: { log_string } " )
486623 console .rule ()
487624
488- return { Path ( k ): v for k , v in filtered_modified_functions . items () if v } , functions_count
625+ return path_based_functions , functions_count
489626
490627
491628def filter_files_optimized (file_path : Path , tests_root : Path , ignore_paths : list [Path ], module_root : Path ) -> bool :
@@ -505,8 +642,8 @@ def filter_files_optimized(file_path: Path, tests_root: Path, ignore_paths: list
505642 if submodule_paths is None :
506643 submodule_paths = ignored_submodule_paths (module_root )
507644 return not (
508- file_path in submodule_paths
509- or any (file_path .is_relative_to (submodule_path ) for submodule_path in submodule_paths )
645+ file_path in submodule_paths
646+ or any (file_path .is_relative_to (submodule_path ) for submodule_path in submodule_paths )
510647 )
511648
512649
@@ -515,4 +652,4 @@ def function_has_return_statement(function_node: FunctionDef | AsyncFunctionDef)
515652
516653
517654def function_is_a_property (function_node : FunctionDef | AsyncFunctionDef ) -> bool :
518- return any (isinstance (node , ast .Name ) and node .id == "property" for node in function_node .decorator_list )
655+ return any (isinstance (node , ast .Name ) and node .id == "property" for node in function_node .decorator_list )
0 commit comments