diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index 9a6242f86..38e0fe96a 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -28,8 +28,8 @@ jobs: - name: install dependencies run: uv sync - - name: Install test-only dependencies (Python 3.13) - if: matrix.python-version == '3.13' + - name: Install test-only dependencies (Python 3.9 and 3.13) + if: matrix.python-version == '3.9' || matrix.python-version == '3.13' run: uv sync --group tests - name: Unit tests diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index dc3a22a5a..590028824 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -255,6 +255,8 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] "optimized_code_runtime": opt.optimized_code_runtime, "speedup": opt.speedup, "trace_id": opt.trace_id, + "function_references": opt.function_references, + "python_version": platform.python_version(), } for opt in request ] @@ -308,6 +310,7 @@ def get_new_explanation( # noqa: D417 original_throughput: str | None = None, optimized_throughput: str | None = None, throughput_improvement: str | None = None, + function_references: str | None = None, ) -> str: """Optimize the given python code for performance by making a request to the Django endpoint. @@ -327,6 +330,7 @@ def get_new_explanation( # noqa: D417 - original_throughput: str | None - throughput for the baseline code (operations per second) - optimized_throughput: str | None - throughput for the optimized code (operations per second) - throughput_improvement: str | None - throughput improvement percentage + - function_references: str | None - where the function is called in the codebase Returns ------- @@ -349,6 +353,7 @@ def get_new_explanation( # noqa: D417 "original_throughput": original_throughput, "optimized_throughput": optimized_throughput, "throughput_improvement": throughput_improvement, + "function_references": function_references, } logger.info("loading|Generating explanation") console.rule() @@ -373,7 +378,12 @@ def get_new_explanation( # noqa: D417 return "" def generate_ranking( # noqa: D417 - self, trace_id: str, diffs: list[str], optimization_ids: list[str], speedups: list[float] + self, + trace_id: str, + diffs: list[str], + optimization_ids: list[str], + speedups: list[float], + function_references: str | None = None, ) -> list[int] | None: """Optimize the given python code for performance by making a request to the Django endpoint. @@ -382,6 +392,7 @@ def generate_ranking( # noqa: D417 - trace_id : unique uuid of function - diffs : list of unified diff strings of opt candidates - speedups : list of speedups of opt candidates + - function_references : where the function is called in the codebase Returns ------- @@ -394,6 +405,7 @@ def generate_ranking( # noqa: D417 "speedups": speedups, "optimization_ids": optimization_ids, "python_version": platform.python_version(), + "function_references": function_references, } logger.info("loading|Generating ranking") console.rule() @@ -594,6 +606,7 @@ def get_optimization_review( "optimized_runtime": humanize_runtime(explanation.best_runtime_ns), "original_runtime": humanize_runtime(explanation.original_runtime_ns), "calling_fn_details": calling_fn_details, + "python_version": platform.python_version(), } console.rule() try: diff --git a/codeflash/code_utils/code_extractor.py b/codeflash/code_utils/code_extractor.py index 5335bad56..e90498936 100644 --- a/codeflash/code_utils/code_extractor.py +++ b/codeflash/code_utils/code_extractor.py @@ -1,6 +1,7 @@ from __future__ import annotations import ast +import time from dataclasses import dataclass from itertools import chain from pathlib import Path @@ -1138,6 +1139,7 @@ def find_specific_function_in_file( def get_fn_references_jedi( source_code: str, file_path: Path, project_root: Path, target_function: str, target_class: str | None ) -> list[Path]: + start_time = time.perf_counter() function_position: CodePosition = find_specific_function_in_file( source_code, file_path, target_function, target_class ) @@ -1146,6 +1148,8 @@ def get_fn_references_jedi( # Get references to the function references = script.get_references(line=function_position.line_no, column=function_position.col_no) # Collect unique file paths where references are found + end_time = time.perf_counter() + logger.debug(f"Jedi for function references ran in {end_time - start_time:.2f} seconds") reference_files = set() for ref in references: if ref.module_path: @@ -1163,6 +1167,7 @@ def get_fn_references_jedi( def get_opt_review_metrics( source_code: str, file_path: Path, qualified_name: str, project_root: Path, tests_root: Path ) -> str: + start_time = time.perf_counter() try: qualified_name_split = qualified_name.rsplit(".", maxsplit=1) if len(qualified_name_split) == 1: @@ -1176,4 +1181,6 @@ def get_opt_review_metrics( except Exception as e: calling_fns_details = "" logger.debug(f"Investigate {e}") + end_time = time.perf_counter() + logger.debug(f"Got function references in {end_time - start_time:.2f} seconds") return calling_fns_details diff --git a/codeflash/models/models.py b/codeflash/models/models.py index 84179054e..b27466c09 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -44,6 +44,7 @@ class AIServiceRefinerRequest: trace_id: str original_line_profiler_results: str optimized_line_profiler_results: str + function_references: str | None = None # If the method spam is in the class Ham, which is at the top level of the module eggs in the package foo, the fully diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 093ff0966..3a0629adb 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -244,7 +244,7 @@ def __init__( ) = None n_tests = N_TESTS_TO_GENERATE_EFFECTIVE self.executor = concurrent.futures.ThreadPoolExecutor( - max_workers=n_tests + 2 if self.experiment_id is None else n_tests + 3 + max_workers=n_tests + 3 if self.experiment_id is None else n_tests + 4 ) def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]: @@ -286,6 +286,7 @@ def generate_and_instrument_tests( list[Path], set[Path], dict | None, + str, ] ]: """Generate and instrument tests, returning all necessary data for optimization.""" @@ -323,9 +324,14 @@ def generate_and_instrument_tests( generated_tests: GeneratedTestsList optimizations_set: OptimizationSet - count_tests, generated_tests, function_to_concolic_tests, concolic_test_str, optimizations_set = ( - generated_results.unwrap() - ) + ( + count_tests, + generated_tests, + function_to_concolic_tests, + concolic_test_str, + optimizations_set, + function_references, + ) = generated_results.unwrap() for i, generated_test in enumerate(generated_tests.generated_tests): with generated_test.behavior_file_path.open("w", encoding="utf8") as f: @@ -371,6 +377,7 @@ def generate_and_instrument_tests( generated_perf_test_paths, instrumented_unittests_created_for_function, original_conftest_content, + function_references, ) ) @@ -403,6 +410,7 @@ def optimize_function(self) -> Result[BestOptimization, str]: generated_perf_test_paths, instrumented_unittests_created_for_function, original_conftest_content, + function_references, ) = test_setup_result.unwrap() baseline_setup_result = self.setup_and_establish_baseline( @@ -437,6 +445,7 @@ def optimize_function(self) -> Result[BestOptimization, str]: generated_tests=generated_tests, test_functions_to_remove=test_functions_to_remove, concolic_test_str=concolic_test_str, + function_references=function_references, ) # Add function to code context hash if in gh actions @@ -458,6 +467,7 @@ def determine_best_candidate( original_helper_code: dict[Path, str], file_path_to_helper_classes: dict[Path, set[str]], exp_type: str, + function_references: str, ) -> BestOptimization | None: best_optimization: BestOptimization | None = None _best_runtime_until_now = original_code_baseline.runtime @@ -667,6 +677,7 @@ def determine_best_candidate( else self.function_trace_id, ai_service_client=ai_service_client, executor=self.executor, + function_references=function_references, ) ) else: @@ -753,6 +764,7 @@ def determine_best_candidate( optimization_ids=optimization_ids, speedups=speedups_list, trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, + function_references=function_references, ) concurrent.futures.wait([future_ranking]) ranking = future_ranking.result() @@ -766,7 +778,7 @@ def determine_best_candidate( min_key = min(overall_ranking, key=overall_ranking.get) elif len(optimization_ids) == 1: min_key = 0 # only one candidate in valid _opts, already returns if there are no valid candidates - else: # 0? shouldn't happen but it's there to escape potential bugs + else: # 0? shouldn't happen, but it's there to escape potential bugs return None best_optimization = valid_candidates_with_shorter_code[min_key] # reassign code string which is the shortest @@ -790,6 +802,7 @@ def refine_optimizations( trace_id: str, ai_service_client: AiServiceClient, executor: concurrent.futures.ThreadPoolExecutor, + function_references: str | None = None, ) -> concurrent.futures.Future: request = [ AIServiceRefinerRequest( @@ -804,6 +817,7 @@ def refine_optimizations( trace_id=trace_id, original_line_profiler_results=original_code_baseline.line_profile_results["str_out"], optimized_line_profiler_results=opt.line_profiler_test_results["str_out"], + function_references=function_references, ) for opt in valid_optimizations ] @@ -1089,7 +1103,7 @@ def generate_tests_and_optimizations( generated_test_paths: list[Path], generated_perf_test_paths: list[Path], run_experiment: bool = False, # noqa: FBT001, FBT002 - ) -> Result[tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet], str]: + ) -> Result[tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet], str, str]: n_tests = N_TESTS_TO_GENERATE_EFFECTIVE assert len(generated_test_paths) == n_tests console.rule() @@ -1116,7 +1130,15 @@ def generate_tests_and_optimizations( future_concolic_tests = self.executor.submit( generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast ) - futures = [*future_tests, future_optimization_candidates, future_concolic_tests] + future_references = self.executor.submit( + get_opt_review_metrics, + self.function_to_optimize_source_code, + self.function_to_optimize.file_path, + self.function_to_optimize.qualified_name, + self.project_root, + self.test_cfg.tests_root, + ) + futures = [*future_tests, future_optimization_candidates, future_concolic_tests, future_references] if run_experiment: future_candidates_exp = self.executor.submit( self.local_aiservice_client.optimize_python_code, @@ -1168,7 +1190,7 @@ def generate_tests_and_optimizations( logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}") return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}") function_to_concolic_tests, concolic_test_str = future_concolic_tests.result() - + function_references = future_references.result() count_tests = len(tests) if concolic_test_str: count_tests += 1 @@ -1182,6 +1204,7 @@ def generate_tests_and_optimizations( function_to_concolic_tests, concolic_test_str, OptimizationSet(control=candidates, experiment=candidates_experiment), + function_references, ) self.generate_and_instrument_tests_results = result return Success(result) @@ -1263,6 +1286,7 @@ def find_and_process_best_optimization( generated_tests: GeneratedTestsList, test_functions_to_remove: list[str], concolic_test_str: str | None, + function_references: str, ) -> BestOptimization | None: """Find the best optimization candidate and process it with all required steps.""" best_optimization = None @@ -1279,6 +1303,7 @@ def find_and_process_best_optimization( original_helper_code=original_helper_code, file_path_to_helper_classes=file_path_to_helper_classes, exp_type=exp_type, + function_references=function_references, ) ph( "cli-optimize-function-finished", @@ -1347,6 +1372,7 @@ def find_and_process_best_optimization( exp_type, original_helper_code, code_context, + function_references, ) return best_optimization @@ -1364,6 +1390,7 @@ def process_review( exp_type: str, original_helper_code: dict[Path, str], code_context: CodeOptimizationContext, + function_references: str, ) -> None: coverage_message = ( original_code_baseline.coverage_results.build_message() @@ -1430,6 +1457,7 @@ def process_review( original_throughput=original_throughput_str, optimized_throughput=optimized_throughput_str, throughput_improvement=throughput_improvement_str, + function_references=function_references, ) new_explanation = Explanation( raw_explanation_message=new_explanation_raw_str or explanation.raw_explanation_message, @@ -1466,16 +1494,9 @@ def process_review( opt_review_response = "" if raise_pr or staging_review: data["root_dir"] = git_root_dir() - calling_fn_details = get_opt_review_metrics( - self.function_to_optimize_source_code, - self.function_to_optimize.file_path, - self.function_to_optimize.qualified_name, - self.project_root, - self.test_cfg.tests_root, - ) try: opt_review_response = self.aiservice_client.get_optimization_review( - **data, calling_fn_details=calling_fn_details + **data, calling_fn_details=function_references ) except Exception as e: logger.debug(f"optimization review response failed, investigate {e}") diff --git a/pyproject.toml b/pyproject.toml index 1186574c0..9473a0811 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ tests = [ "scipy>=1.13.1", "torch>=2.8.0", "xarray>=2024.7.0", + "eval_type_backport" ] [tool.hatch.build.targets.sdist]