88from _ast import AsyncFunctionDef , ClassDef , FunctionDef
99from collections import defaultdict
1010from functools import cache
11+ from itertools import islice
1112from pathlib import Path
1213from typing import TYPE_CHECKING , Any , Optional
1314
1415import git
1516import libcst as cst
1617from pydantic .dataclasses import dataclass
1718
18- from codeflash .api .cfapi import get_blocklisted_functions , make_cfapi_request , is_function_being_optimized_again
19+ from codeflash .api .cfapi import get_blocklisted_functions , is_function_being_optimized_again
1920from codeflash .cli_cmds .console import DEBUG_MODE , console , logger
2021from codeflash .code_utils .code_utils import (
2122 is_class_defined_in_file ,
@@ -153,38 +154,37 @@ def get_code_context_hash(self) -> str:
153154 to uniquely identify the function for optimization tracking.
154155 """
155156 try :
156- with open (self .file_path , 'r' , encoding = 'utf-8' ) as f :
157- file_content = f .read ()
158-
159- # Extract the function's code content
160- lines = file_content .splitlines ()
157+ # Read only the necessary lines if possible, otherwise fallback to full file.
161158 if self .starting_line is not None and self .ending_line is not None :
162- # Use line numbers if available (1-indexed to 0-indexed)
163- function_content = '\n ' .join (lines [self .starting_line - 1 :self .ending_line ])
159+ # Efficiently read only relevant function lines
160+ start = self .starting_line - 1 # convert to 0-indexed
161+ end = self .ending_line # exclusive
162+ with open (self .file_path , encoding = "utf-8" ) as f :
163+ function_lines = list (islice (f , start , end ))
164+ function_content = "" .join (function_lines ).strip ()
164165 else :
165166 # Fallback: use the entire file content if line numbers aren't available
166- function_content = file_content
167+ with open (self .file_path , encoding = "utf-8" ) as f :
168+ function_content = f .read ().strip ()
167169
168- # Create a context string that includes:
169- # - File path (relative to make it portable)
170- # - Qualified function name
171- # - Function code content
170+ # Create a context string that includes filename (for portability),
171+ # qualified function name, and function code content.
172172 context_parts = [
173173 str (self .file_path .name ), # Just filename for portability
174174 self .qualified_name ,
175- function_content . strip ()
175+ function_content ,
176176 ]
177-
178- context_string = '\n ---\n ' .join (context_parts )
177+ context_string = "\n ---\n " .join (context_parts )
179178
180179 # Generate SHA-256 hash
181- return hashlib .sha256 (context_string .encode (' utf-8' )).hexdigest ()
180+ return hashlib .sha256 (context_string .encode (" utf-8" )).hexdigest ()
182181
183- except ( OSError , IOError ) as e :
182+ except OSError as e :
184183 logger .warning (f"Could not read file { self .file_path } for hashing: { e } " )
185184 # Fallback hash using available metadata
186185 fallback_string = f"{ self .file_path .name } :{ self .qualified_name } "
187- return hashlib .sha256 (fallback_string .encode ('utf-8' )).hexdigest ()
186+ return hashlib .sha256 (fallback_string .encode ("utf-8" )).hexdigest ()
187+
188188
189189def get_functions_to_optimize (
190190 optimize_all : str | None ,
@@ -228,7 +228,7 @@ def get_functions_to_optimize(
228228 found_function = None
229229 for fn in functions .get (file , []):
230230 if only_function_name == fn .function_name and (
231- class_name is None or class_name == fn .top_level_parent_name
231+ class_name is None or class_name == fn .top_level_parent_name
232232 ):
233233 found_function = fn
234234 if found_function is None :
@@ -307,7 +307,7 @@ def find_all_functions_in_file(file_path: Path) -> dict[Path, list[FunctionToOpt
307307
308308
309309def get_all_replay_test_functions (
310- replay_test : Path , test_cfg : TestConfig , project_root_path : Path
310+ replay_test : Path , test_cfg : TestConfig , project_root_path : Path
311311) -> dict [Path , list [FunctionToOptimize ]]:
312312 function_tests = discover_unit_tests (test_cfg , discover_only_these_tests = [replay_test ])
313313 # Get the absolute file paths for each function, excluding class name if present
@@ -322,7 +322,7 @@ def get_all_replay_test_functions(
322322 class_name = (
323323 module_path_parts [- 1 ]
324324 if module_path_parts
325- and is_class_defined_in_file (
325+ and is_class_defined_in_file (
326326 module_path_parts [- 1 ], Path (project_root_path , * module_path_parts [:- 1 ]).with_suffix (".py" )
327327 )
328328 else None
@@ -374,8 +374,7 @@ def ignored_submodule_paths(module_root: str) -> list[str]:
374374
375375class TopLevelFunctionOrMethodVisitor (ast .NodeVisitor ):
376376 def __init__ (
377- self , file_name : Path , function_or_method_name : str , class_name : str | None = None ,
378- line_no : int | None = None
377+ self , file_name : Path , function_or_method_name : str , class_name : str | None = None , line_no : int | None = None
379378 ) -> None :
380379 self .file_name = file_name
381380 self .class_name = class_name
@@ -406,13 +405,13 @@ def visit_ClassDef(self, node: ast.ClassDef) -> None:
406405 if isinstance (body_node , ast .FunctionDef ) and body_node .name == self .function_name :
407406 self .is_top_level = True
408407 if any (
409- isinstance (decorator , ast .Name ) and decorator .id == "classmethod"
410- for decorator in body_node .decorator_list
408+ isinstance (decorator , ast .Name ) and decorator .id == "classmethod"
409+ for decorator in body_node .decorator_list
411410 ):
412411 self .is_classmethod = True
413412 elif any (
414- isinstance (decorator , ast .Name ) and decorator .id == "staticmethod"
415- for decorator in body_node .decorator_list
413+ isinstance (decorator , ast .Name ) and decorator .id == "staticmethod"
414+ for decorator in body_node .decorator_list
416415 ):
417416 self .is_staticmethod = True
418417 return
@@ -421,13 +420,13 @@ def visit_ClassDef(self, node: ast.ClassDef) -> None:
421420 # This way, if we don't have the class name, we can still find the static method
422421 for body_node in node .body :
423422 if (
424- isinstance (body_node , ast .FunctionDef )
425- and body_node .name == self .function_name
426- and body_node .lineno in {self .line_no , self .line_no + 1 }
427- and any (
428- isinstance (decorator , ast .Name ) and decorator .id == "staticmethod"
429- for decorator in body_node .decorator_list
430- )
423+ isinstance (body_node , ast .FunctionDef )
424+ and body_node .name == self .function_name
425+ and body_node .lineno in {self .line_no , self .line_no + 1 }
426+ and any (
427+ isinstance (decorator , ast .Name ) and decorator .id == "staticmethod"
428+ for decorator in body_node .decorator_list
429+ )
431430 ):
432431 self .is_staticmethod = True
433432 self .is_top_level = True
@@ -460,10 +459,7 @@ def inspect_top_level_functions_or_methods(
460459
461460
462461def check_optimization_status (
463- functions_by_file : dict [Path , list [FunctionToOptimize ]],
464- owner : str ,
465- repo : str ,
466- pr_number : int
462+ functions_by_file : dict [Path , list [FunctionToOptimize ]], owner : str , repo : str , pr_number : int
467463) -> tuple [dict [Path , list [FunctionToOptimize ]], int ]:
468464 """Check which functions have already been optimized and filter them out.
469465
@@ -480,6 +476,7 @@ def check_optimization_status(
480476
481477 Returns:
482478 Tuple of (filtered_functions_dict, remaining_count)
479+
483480 """
484481 # Build the code_contexts dictionary for the API call
485482 code_contexts = {}
@@ -500,7 +497,6 @@ def check_optimization_status(
500497 result = is_function_being_optimized_again (owner , repo , pr_number , code_contexts )
501498 already_optimized_paths = set (result .get ("already_optimized_paths" , []))
502499
503-
504500 # Filter out already optimized functions
505501 filtered_functions = defaultdict (list )
506502 remaining_count = 0
@@ -556,12 +552,12 @@ def filter_functions(
556552 test_functions_removed_count += len (_functions )
557553 continue
558554 if file_path in ignore_paths or any (
559- file_path .startswith (str (ignore_path ) + os .sep ) for ignore_path in ignore_paths
555+ file_path .startswith (str (ignore_path ) + os .sep ) for ignore_path in ignore_paths
560556 ):
561557 ignore_paths_removed_count += 1
562558 continue
563559 if file_path in submodule_paths or any (
564- file_path .startswith (str (submodule_path ) + os .sep ) for submodule_path in submodule_paths
560+ file_path .startswith (str (submodule_path ) + os .sep ) for submodule_path in submodule_paths
565561 ):
566562 submodule_ignored_paths_count += 1
567563 continue
@@ -579,12 +575,14 @@ def filter_functions(
579575 if blocklist_funcs :
580576 functions_tmp = []
581577 for function in _functions :
582- if not (
578+ if (
583579 function .file_path .name in blocklist_funcs
584580 and function .qualified_name in blocklist_funcs [function .file_path .name ]
585581 ):
582+ # This function is in blocklist, we can skip it
586583 blocklist_funcs_removed_count += 1
587584 continue
585+ # This function is NOT in blocklist. we can keep it
588586 functions_tmp .append (function )
589587 _functions = functions_tmp
590588
@@ -609,9 +607,7 @@ def filter_functions(
609607 owner , repo = get_repo_owner_and_name (repository )
610608 pr_number = get_pr_number ()
611609 if owner and repo and pr_number is not None :
612- path_based_functions , functions_count = check_optimization_status (
613- path_based_functions , owner , repo , pr_number
614- )
610+ path_based_functions , functions_count = check_optimization_status (path_based_functions , owner , repo , pr_number )
615611 initial_count = sum (len (funcs ) for funcs in filtered_modified_functions .values ())
616612 already_optimized_count = initial_count - functions_count
617613
@@ -652,8 +648,8 @@ def filter_files_optimized(file_path: Path, tests_root: Path, ignore_paths: list
652648 if submodule_paths is None :
653649 submodule_paths = ignored_submodule_paths (module_root )
654650 return not (
655- file_path in submodule_paths
656- or any (file_path .is_relative_to (submodule_path ) for submodule_path in submodule_paths )
651+ file_path in submodule_paths
652+ or any (file_path .is_relative_to (submodule_path ) for submodule_path in submodule_paths )
657653 )
658654
659655
@@ -662,4 +658,4 @@ def function_has_return_statement(function_node: FunctionDef | AsyncFunctionDef)
662658
663659
664660def function_is_a_property (function_node : FunctionDef | AsyncFunctionDef ) -> bool :
665- return any (isinstance (node , ast .Name ) and node .id == "property" for node in function_node .decorator_list )
661+ return any (isinstance (node , ast .Name ) and node .id == "property" for node in function_node .decorator_list )
0 commit comments